# -*- coding: utf-8 -*-
import copy
import random
from scrapy import Request
from scrapy.exceptions import IgnoreRequest
from zc_core.spiders.base import BaseSpider
from zc_core.client.mongo_client import Mongo
from zc_core.util.batch_gen import time_to_batch_no
from zc_core.util.http_util import retry_request

from chng.rules import *


class SkuSpider(BaseSpider):
    name = 'sku'
    custom_settings = {
        'CONCURRENT_REQUESTS': 4,
        'DOWNLOAD_DELAY': 0.5,
        'CONCURRENT_REQUESTS_PER_DOMAIN': 4,
        'CONCURRENT_REQUESTS_PER_IP': 4,
    }

    # 常用链接
    cat_url = "http://ec.chng.com.cn/scm-hn-oauth-web/obs/business/product/Catrgory/query?channelTypeCode=101"
    sku_list_url = 'http://mall.ec.chng.com.cn/scm-hn-oauth-web/obs/business/product/ProductSearch/search'

    def __init__(self, batchNo=None, *args, **kwargs):
        super(SkuSpider, self).__init__(batchNo=batchNo, *args, **kwargs)
        # 页数限制
        self.max_page_limit = 166
        self.page_size = 30

    def start_requests(self):
        br_filter = set()
        brand_list = Mongo().list('meta_brand_pool')
        for br in brand_list:
            for kw in [br.get('cn'), br.get('en'), br.get('main_brand')]:
                if kw and kw not in br_filter:
                    br_filter.add(kw)
                    yield self._build_req(page=1, callback=self.parse_sku_list, kw=kw)
        # yield self._build_req(page=1, callback=self.parse_sku_list, kw='3M')

    def _build_req(self, page, callback, kw, sort=None, groups=[], groups_type=[]):
        if kw:
            query = {
                "keywords": kw,
                "groups": [
                ],
                "currentPage": page,
                "start": (page - 1) * self.page_size,
                "limit": self.page_size,
            }
            if sort:
                query["sort"] = sort
            if groups:
                query["groups"] = groups

            priority = int(random.choice(range(10, 1000)))
            return Request(
                method='POST',
                url=self.sku_list_url,
                body=json.dumps(query),
                meta={
                    'reqType': 'sku',
                    'batchNo': self.batch_no,
                    'page': page,
                    'kw': kw,
                    'groups': copy.copy(groups),
                    'groups_type': copy.copy(groups_type),
                },
                headers={
                    'Accept': 'application/json, text/plain, */*',
                    'Accept-Encoding': 'gzip, deflate',
                    'Accept-Language': 'zh-CN,zh;q=0.9',
                    'Cache-Control': 'no-cache',
                    'Connection': 'keep-alive',
                    'Content-Type': 'application/json',
                    'Host': 'mall.ec.chng.com.cn',
                    'Origin': 'http://mall.ec.chng.com.cn',
                    'Pragma': 'no-cache',
                    'Referer': 'http://mall.ec.chng.com.cn/list?keywords={}'.format(kw),
                    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.212 Safari/537.36'
                }
                ,
                callback=callback,
                errback=self.error_back,
                priority=priority,
                dont_filter=True
            )

    def parse_sku_list(self, response):
        meta = response.meta
        cur_page = meta.get("page")
        kw = meta.get("kw")
        groups = meta.get("groups", [])
        groups_type = meta.get("groups_type", [])

        # 第一页
        sku_list = parse_sku_list(response)
        if not sku_list:
            self.logger.info('空页: page=%s, kw=%s, grp=%s' % (cur_page, kw, groups))
        else:
            self.logger.info('清单: kw=%s, grp=%s, page=%s, count=%s' % (kw, groups, cur_page, len(sku_list)))
            yield Box('sku', self.batch_no, sku_list)

            total_pages, group_info = parse_query_info(response)
            if total_pages <= self.max_page_limit:
                self.logger.info('页数1：kw=%s, grp=%s, ttp=%s' % (kw, groups, total_pages))
                # 商品量 <= 5000
                for page in range(2, total_pages + 1):
                    yield self._build_req(page=page, callback=self.parse_more_sku, kw=kw, groups=groups,
                                          groups_type=groups_type)
            elif self.max_page_limit < total_pages <= self.max_page_limit * 2:
                self.logger.info('页数2：kw=%s, grp=%s, ttp=%s' % (kw, groups, total_pages))
                # 5000 < 商品量 <= 10000
                for page in range(2, self.max_page_limit + 1):
                    yield self._build_req(page=page, callback=self.parse_more_sku, kw=kw, sort="price@asc",
                                          groups=groups, groups_type=groups_type)
                for page in range(2, self.max_page_limit + 1):
                    yield self._build_req(page=page, callback=self.parse_more_sku, kw=kw, sort="price@desc",
                                          groups=groups, groups_type=groups_type)
            elif total_pages > self.max_page_limit * 2:
                self.logger.info('页数3：kw=%s, grp=%s, ttp=%s' % (kw, groups, total_pages))
                # 商品量 > 10000 (递归增加请求参数)
                if kw in ['得力', '京东', '齐心', '史泰博', '西域']:
                    new_groups_type = copy.copy(groups_type)
                    new_groups = copy.copy(groups)
                    new_groups_type.append('brandcode')
                    new_groups.append('brandcode:品牌:{}:{}'.format(kw, kw))
                    yield self._build_req(page=1, callback=self.parse_sku_list, kw=kw, groups_type=new_groups_type,
                                          groups=new_groups)
                else:
                    channels = group_info.get('channelcode')
                    brands = group_info.get('brandcode')
                    if channels and 'channelcode' not in groups_type:
                        for channel in channels:
                            new_groups_type = copy.copy(groups_type)
                            new_groups = copy.copy(groups)
                            new_groups_type.append('channelcode')
                            new_groups.append(channel.get('code'))
                            yield self._build_req(page=1, callback=self.parse_sku_list, kw=kw,
                                                  groups_type=new_groups_type, groups=new_groups)
                    elif brands and 'brandcode' not in groups_type:
                        for brand in brands:
                            new_groups_type = copy.copy(groups_type)
                            new_groups = copy.copy(groups)
                            new_groups_type.append('brandcode')
                            new_groups.append(brand.get('code'))
                            yield self._build_req(page=1, callback=self.parse_sku_list, kw=kw,
                                                  groups_type=new_groups_type, groups=new_groups)

    def parse_more_sku(self, response):
        meta = response.meta
        cur_page = meta.get("page")
        kw = meta.get("kw")
        groups = meta.get("groups", [])

        sku_list = parse_sku_list(response)
        if sku_list:
            self.logger.info('清单: kw=%s, grp=%s, page=%s, count=%s' % (kw, groups, cur_page, len(sku_list)))
            yield Box('sku', self.batch_no, sku_list)
        else:
            self.logger.info('空页: page=%s, kw=%s, grp=%s' % (cur_page, kw, groups))
